
from lxml import etree
import requests
 
base_url = 'https://nj.58.com/chuzu/?PGTID=0d100000-000a-cc79-8837-fd66978f407b&ClickID=5'
response = requests.get(base_url)
 
html = response.text
 
 
html = etree.HTML(html)
 
# 找到所有房源li
li_list = html.xpath('//ul[@class="listUl"]/li')
for li in li_list:
    # 从一个房源中提取具体信息
    title = li.xpath('.//h2/a/text()')
    if title:
        title = title[0].strip()
    else:
        continue
    square = li.xpath('.//p[1]/text()')[0].replace(' ','').replace('\xa0','')
    print(title,square)


